suppressPackageStartupMessages(library(tidyverse))
library(patchwork)
devtools::load_all('~/Google Drive/My Drive/Scripts/R_packages/myUtilities/')
## ℹ Loading myUtilities

Settings

data_dir <- '/Volumes/Mitsu_NGS_3/METTL2A/'

wd <- "~/Google Drive/My Drive/Analysis/METTL2A/"
setwd(wd)

figdir <- paste0(wd, 'Figures/Shortread/Stringtie_tximport_DESeq2/Correlation/')
tabledir <- paste0(wd, 'Tables/Shortread/')

theme_set(
  theme_classic(base_size = 7) +
    theme(legend.position = 'bottom')
)

Read data

shortread_stringtie_txi_DESeq2 <- 
  read_tsv(
    paste0(wd, 'Tables/Shortread/shortread_stringtie_txi_DESeq2_DEG_methylation_2024-04-17.tsv.gz')
  )
## Rows: 12974 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (9): gene_name, gene_id, methylation, isUp, isDown, common_DEGs, gene_t...
## dbl (18): siMETTL2A_baseMean, siMETTL2A_log2FoldChange, siMETTL2A_lfcSE, siM...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
shortread_stringtie_txi_DESeq2
## # A tibble: 12,974 × 27
##    gene_name siMETTL2A_baseMean siMETTL2A_log2FoldChange siMETTL2A_lfcSE
##    <chr>                  <dbl>                    <dbl>           <dbl>
##  1 A1CF                  4720.                   -0.917           0.0782
##  2 A4GALT                 335.                    1.89            0.567 
##  3 AAAS                  3917.                   -0.298           0.139 
##  4 AACS                  4911.                   -0.146           0.337 
##  5 AADAT                   15.5                  -2.52            1.93  
##  6 AAGAB                 9386.                   -0.285           0.145 
##  7 AAK1                    54.6                   0.205           0.632 
##  8 AAMDC                 1677.                    0.763           0.529 
##  9 AAMP                 20411.                    0.0309          0.200 
## 10 AAR2                  4967.                   -0.106           0.401 
## # ℹ 12,964 more rows
## # ℹ 23 more variables: siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>,
## #   siMETTL2A_padj <dbl>, siMETTL2A_G_baseMean <dbl>,
## #   siMETTL2A_G_log2FoldChange <dbl>, siMETTL2A_G_lfcSE <dbl>,
## #   siMETTL2A_G_stat <dbl>, siMETTL2A_G_pvalue <dbl>, siMETTL2A_G_padj <dbl>,
## #   siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## #   siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, …
shortread_stringtie_txi_DESeq2 |> 
  export_tsv(outdir = tabledir, compression = 'gz')
## 
## Exported to: ~/Google Drive/My Drive/Analysis/METTL2A/Tables/Shortread/shortread_stringtie_txi_DESeq2_2024-07-30.tsv.gz
## # A tibble: 12,974 × 27
##    gene_name siMETTL2A_baseMean siMETTL2A_log2FoldChange siMETTL2A_lfcSE
##    <chr>                  <dbl>                    <dbl>           <dbl>
##  1 A1CF                  4720.                   -0.917           0.0782
##  2 A4GALT                 335.                    1.89            0.567 
##  3 AAAS                  3917.                   -0.298           0.139 
##  4 AACS                  4911.                   -0.146           0.337 
##  5 AADAT                   15.5                  -2.52            1.93  
##  6 AAGAB                 9386.                   -0.285           0.145 
##  7 AAK1                    54.6                   0.205           0.632 
##  8 AAMDC                 1677.                    0.763           0.529 
##  9 AAMP                 20411.                    0.0309          0.200 
## 10 AAR2                  4967.                   -0.106           0.401 
## # ℹ 12,964 more rows
## # ℹ 23 more variables: siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>,
## #   siMETTL2A_padj <dbl>, siMETTL2A_G_baseMean <dbl>,
## #   siMETTL2A_G_log2FoldChange <dbl>, siMETTL2A_G_lfcSE <dbl>,
## #   siMETTL2A_G_stat <dbl>, siMETTL2A_G_pvalue <dbl>, siMETTL2A_G_padj <dbl>,
## #   siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## #   siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, …

Plot

shortread_stringtie_txi_DESeq2_2dhistogram <- 
  shortread_stringtie_txi_DESeq2 |> 
  ggplot(aes(x = siMETTL2A_G_log2FoldChange, y = siMETTL2A_I_log2FoldChange)) +
  geom_hex(bins = 100) +
  scale_fill_viridis_c(trans = 'log10') +
  lims(x = c(-10, 10), y = c(-10, 10)) +
  tune::coord_obs_pred(ratio = 1) 
shortread_stringtie_txi_DESeq2_2dhistogram |> 
  ggsave_multiple_formats(
    width = 5, height = 5, fontsize = 7, outdir = figdir
  )
## Warning: Removed 284 rows containing non-finite values (`stat_binhex()`).
## Removed 284 rows containing non-finite values (`stat_binhex()`).
## Removed 284 rows containing non-finite values (`stat_binhex()`).
## Removed 284 rows containing non-finite values (`stat_binhex()`).
## Removed 284 rows containing non-finite values (`stat_binhex()`).

 shortread_stringtie_txi_DESeq2 |> 
  ggplot(aes(x = siMETTL2A_baseMean)) +
  geom_histogram() +
  scale_x_log10() +
  geom_vline(xintercept = c(100))
## Warning: Transformation introduced infinite values in continuous x-axis
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 191 rows containing non-finite values (`stat_bin()`).

shortread_stringtie_txi_DESeq2 |> 
  plot_2dhistogram_withcortest(
    x = siMETTL2A_G_log2FoldChange, y = siMETTL2A_I_log2FoldChange
  ) 
## Warning in cor.test.default(x = mf[[1L]], y = mf[[2L]], ...): Cannot compute
## exact p-value with ties
## # A tibble: 2 × 9
##   estimate statistic   p.value method         method_short alternative parameter
##      <dbl>     <dbl>     <dbl> <chr>          <chr>        <chr>           <int>
## 1    0.309   2.36e11 1.01e-278 Spearman's ra… Spearman     two.sided          NA
## 2    0.369   4.47e 1 0         Pearson's pro… Pearson      two.sided       12705
## # ℹ 2 more variables: conf.low <dbl>, conf.high <dbl>
## # A tibble: 2 × 1
##   msg                            
##   <chr>                          
## 1 Spearman: r = 0.31, p < 2.2e-16
## 2 Pearson: r = 0.37, p < 2.2e-16

Number of DEGs

shortread_stringtie_txi_DESeq2 |> 
  group_by(isUp) |> 
  reframe(n = n())
## # A tibble: 4 × 2
##   isUp       n
##   <chr>  <int>
## 1 common  1685
## 2 not     7112
## 3 only G  1998
## 4 only I  2179
shortread_stringtie_txi_DESeq2 |> 
  group_by(isDown) |> 
  reframe(n = n())
## # A tibble: 4 × 2
##   isDown     n
##   <chr>  <int>
## 1 common  1805
## 2 not     7244
## 3 only G  1838
## 4 only I  2087